package com.sxw.github.webmagicdemo.webmagic;

import com.sxw.github.webmagicdemo.model.HouseInfo;
import org.apache.commons.lang3.math.NumberUtils;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Html;
import us.codecraft.webmagic.selector.Selectable;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

@Component
public class FangProcess implements PageProcessor {

    private Site site = Site.me().setRetryTimes(3).setSleepTime(100)
            .setUserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.155 Safari/537.36");

    @Override
    public void process(Page page) {
        Html h = page.getHtml();

        List<Selectable> selectables = h.xpath("//dl[@dataflag='bg']").nodes();
        //List<String> pageTotal = h.xpath("//span[@class='on']/text()").all();
        List houseInfos = new ArrayList();
        for(Selectable selectable : selectables){
            String tempInfo = selectable.xpath("//p[@class='tel_shop']/text()").toString();
            String villageName = selectable.xpath("//p[@class='add_shop']/a/text()").toString();
            String villageSite = selectable.xpath("//p[@class='add_shop']/span/text()").toString();

            String[] infos = tempInfo.split(" ");
            String title = selectable.xpath("//span[@class='tit_shop']/text()").toString();

            String houseType = infos[1];

            double houseSize = NumberUtils.toDouble(infos[2].split("�")[1]);

            String floorInfo = infos[3];
            HouseInfo houseInfo = new HouseInfo();
            houseInfo.setTitle(title);
            houseInfo.setHouse_size(houseSize);
            houseInfo.setHouse_type(houseType);
            houseInfo.setFloor_info(floorInfo);
            houseInfo.setVillage_name(villageName);
            houseInfo.setVillage_site(villageSite);
            try {
                Date createTime = null;
                if(infos.length == 6) {
                    createTime = new SimpleDateFormat("yyyy").parse(infos[5].substring(0,4));
                } else if(infos.length == 5 && infos[4].length() > 4){
                    createTime = new SimpleDateFormat("yyyy").parse(infos[4].substring(0,4));
                }
                houseInfo.setCreate_time(createTime);
            } catch (ParseException e) {
                e.printStackTrace();
            }
            houseInfos.add(houseInfo);
        }
        page.putField("houseInfos",houseInfos);

    }

    @Override
    public Site getSite() {
        return site;
    }

}
